# coding=utf-8

import json
import sys

from api import utils
from api import elastics

try:
    reload(sys)
    sys.setdefaultencoding("utf-8")
except Exception:
    pass


FILENAME = 'data_ex2.json'


def store(data):
    with open(FILENAME, 'w') as json_file:
        json_file.write(json.dumps(data))


def load():
    with open(FILENAME) as json_file:
        return json.load(json_file)


if __name__ == '__main__':
    client = elastics.get_client(
        'es+resultdb://elastic:NklO37aCJUESRaseGye7ecPOybfJsf@192.168.23.40:9200/?index=resultdb')
    query = {
        "query": {
            "bool": {
                # "must": [
                # {"match": {"title": "Search"}},
                # {"match": {"content": "Elasticsearch"}}
                # ],
                "filter": [
                    {'term': {'project': 'epochtimes'}},
                    {"exists": {"field": "result.author"}}
                    # {"range": {"publish_date": {"gte": "2015-01-01"}}}
                ]
            },
        }
    }
    data = []

    for i, _doc in enumerate(client.select_by_query(query)):
        # if i > 10:
        #     break

        _doc['content'] = utils.format_content(_doc['content'])
        if len(_doc['content']) < 100:
            continue

        if _doc.get('html'):
            _doc.pop('html')
        data.append(_doc)
        print('\r已处理：%d\r' % (i + 1))

    store({'data': data})

    # all_info, all_doc_handled = elastics.get_editor_and_author(client, 'secretchina_kanguanchang', 0)
